1 Downloading Data

This is the exploratory data analysis of Turkey’s incoming tourist’s nationalities between 2008 and 2020. The data is downloaded from TCMB’s website and translated to English.

raw_df <- read_excel("milliyetlere_gore_ziyaretci_sayisi.xlsx")

raw_df %>% glimpse()
## Rows: 150
## Columns: 103
## $ Date                                  <chr> "2008-01", "2008-02", "2008-0...
## $ Germany                               <dbl> 177233, 143666, 249797, 24253...
## $ Albania                               <dbl> 2811, 2604, 3626, 3219, 4156,...
## $ Austria                               <dbl> 20207, 16295, 23558, 22668, 3...
## $ Belgium                               <dbl> 12389, 11309, 21097, 30772, 5...
## $ `Bosnia-Herzegovina`                  <dbl> 2546, 2342, 2952, 3539, 4709,...
## $ Bulgaria                              <dbl> 99048, 82707, 102877, 110627,...
## $ `The Czech Republic`                  <dbl> 1824, 2064, 2524, 4198, 9286,...
## $ Denmark                               <dbl> 5613, 5464, 11288, 10878, 260...
## $ Estonia                               <dbl> 426, 515, 695, 1098, 4508, 46...
## $ Finland                               <dbl> 1921, 1723, 4142, 7661, 12479...
## $ France                                <dbl> 30796, 29228, 36832, 73221, 8...
## $ `South Cyprus`                        <dbl> 296, 369, 435, 527, 888, 898,...
## $ Croatia                               <dbl> 2354, 1738, 2649, 2384, 2997,...
## $ Holland                               <dbl> 32012, 23565, 34660, 47950, 1...
## $ England                               <dbl> 35215, 33385, 46445, 85841, 2...
## $ Ireland                               <dbl> 2086, 1723, 3593, 3846, 11936...
## $ Spain                                 <dbl> 7724, 8331, 27664, 20599, 319...
## $ Sweden                                <dbl> 8156, 5808, 11248, 14843, 474...
## $ Switzerland                           <dbl> 9655, 8947, 13368, 14681, 224...
## $ Italy                                 <dbl> 24918, 15204, 25805, 34936, 5...
## $ Iceland                               <dbl> 72, 99, 367, 622, 463, 2065, ...
## $ Montenegro                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ Kosovo                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ Latvia                                <dbl> 1126, 1309, 2050, 2251, 6950,...
## $ Lithuania                             <dbl> 1076, 1198, 2262, 3400, 14423...
## $ Luxemburg                             <dbl> 164, 155, 341, 394, 2813, 606...
## $ Hungary                               <dbl> 2266, 2281, 3085, 3974, 5364,...
## $ Macedonia                             <dbl> 5480, 5253, 5930, 7985, 8042,...
## $ Malta                                 <dbl> 114, 110, 280, 259, 242, 262,...
## $ Norway                                <dbl> 3488, 3203, 8424, 9692, 22667...
## $ Poland                                <dbl> 4543, 5009, 5100, 9547, 34855...
## $ Portugal                              <dbl> 810, 999, 1849, 2006, 3393, 4...
## $ Romania                               <dbl> 16966, 21941, 21451, 26194, 3...
## $ Serbia                                <dbl> 8449, 7248, 8441, 9357, 11950...
## $ Slovakia                              <dbl> 883, 1379, 1408, 1957, 2815, ...
## $ Slovenia                              <dbl> 1279, 1378, 1322, 1910, 3649,...
## $ Greece                                <dbl> 27656, 21990, 44384, 46099, 5...
## $ `Other Europe Countries`              <dbl> 91, 60, 145, 133, 170, 230, 2...
## $ `Total Europe`                        <dbl> 551693, 470599, 732094, 86179...
## $ Azerbaijan                            <dbl> 29978, 33028, 37111, 35267, 3...
## $ Belarus                               <dbl> 1764, 2999, 3040, 4130, 14623...
## $ Armenie                               <dbl> 2500, 3586, 3731, 4011, 4756,...
## $ Georgia                               <dbl> 40517, 43820, 49739, 57554, 7...
## $ Kazakhstan                            <dbl> 6059, 8777, 8070, 8178, 12683...
## $ Kyrgyzstan                            <dbl> 3859, 3398, 3597, 3474, 3678,...
## $ Moldova                               <dbl> 7769, 9322, 10433, 11654, 150...
## $ Uzbekistan                            <dbl> 3431, 4520, 4133, 3991, 4645,...
## $ Russia                                <dbl> 52741, 46999, 52034, 64565, 2...
## $ Tajikistan                            <dbl> 2965, 4837, 6169, 4361, 3873,...
## $ Turkmenistan                          <dbl> 5221, 5407, 6562, 6825, 6740,...
## $ Ukraine                               <dbl> 20789, 21474, 23654, 29568, 9...
## $ `Total UIS`                           <dbl> 177593, 188167, 208273, 23357...
## $ `United States`                       <dbl> 22581, 18562, 29479, 38922, 8...
## $ Argentina                             <dbl> 662, 612, 789, 916, 2358, 236...
## $ Brazil                                <dbl> 1442, 1446, 1318, 2387, 5157,...
## $ Canada                                <dbl> 3670, 2981, 5090, 7395, 18578...
## $ Colombia                              <dbl> 277, 107, 238, 283, 488, 656,...
## $ Mexico                                <dbl> 720, 546, 1430, 1501, 2287, 2...
## $ Chile                                 <dbl> 185, 554, 248, 527, 1094, 101...
## $ Venezuela                             <dbl> 494, 125, 412, 469, 744, 789,...
## $ `Other America Countries`             <dbl> 1590, 1895, 1857, 2235, 3368,...
## $ `Total America`                       <dbl> 31621, 26828, 40861, 54635, 1...
## $ Algeria                               <dbl> 3719, 2825, 3936, 4898, 5284,...
## $ Morocco                               <dbl> 2088, 2094, 2338, 2774, 3274,...
## $ `South Africa`                        <dbl> 586, 578, 1077, 1267, 2244, 2...
## $ Libya                                 <dbl> 2526, 1871, 2672, 3172, 3379,...
## $ Egypt                                 <dbl> 2320, 3201, 3412, 3441, 4073,...
## $ Sudan                                 <dbl> 329, 220, 336, 330, 562, 931,...
## $ Tunusia                               <dbl> 2541, 2216, 3684, 2877, 4135,...
## $ `East Africa`                         <dbl> 1164, 1600, 1709, 1826, 2807,...
## $ `Total Africa`                        <dbl> 15273, 14605, 19164, 20585, 2...
## $ `United Arab Emirates`                <dbl> 568, 252, 605, 405, 617, 1458...
## $ Bahrain                               <dbl> 194, 166, 247, 232, 323, 520,...
## $ Bangladesh                            <dbl> 226, 357, 245, 224, 267, 356,...
## $ China                                 <dbl> 3848, 4972, 4014, 4586, 6100,...
## $ Indonesia                             <dbl> 545, 486, 910, 1149, 1500, 19...
## $ Philippines                           <dbl> 1248, 2018, 2040, 1971, 2456,...
## $ `South Korea`                         <dbl> 13933, 10652, 8717, 12782, 12...
## $ India                                 <dbl> 2825, 2688, 4180, 4201, 6783,...
## $ Iraq                                  <dbl> 8771, 10418, 13165, 12539, 13...
## $ Iran                                  <dbl> 29120, 36619, 81837, 63238, 8...
## $ Israel                                <dbl> 16500, 19408, 29858, 50992, 4...
## $ Japan                                 <dbl> 10985, 11197, 12858, 11516, 1...
## $ `Turkish Republic of Northern Cyprus` <dbl> 10696, 15090, 11621, 14283, 1...
## $ Qatar                                 <dbl> 91, 105, 113, 158, 154, 272, ...
## $ Kuwait                                <dbl> 275, 478, 256, 896, 884, 1707...
## $ Lebanon                               <dbl> 1836, 1719, 2842, 3939, 2565,...
## $ Malaysia                              <dbl> 1153, 1679, 2424, 1840, 2614,...
## $ Pakistan                              <dbl> 1858, 1685, 1800, 1746, 2659,...
## $ Singapore                             <dbl> 736, 909, 1117, 1302, 1693, 2...
## $ Syria                                 <dbl> 24464, 24949, 27203, 27704, 3...
## $ `Saudi Arabia`                        <dbl> 721, 1499, 1016, 2074, 2025, ...
## $ Thailand                              <dbl> 563, 516, 1003, 1081, 1005, 7...
## $ Jordan                                <dbl> 2787, 2594, 3280, 3455, 4336,...
## $ Yemen                                 <dbl> 116, 299, 287, 331, 444, 426,...
## $ `Other Asia Countries`                <dbl> 2921, 3191, 4073, 4547, 7854,...
## $ `Total Asia`                          <dbl> 136980, 153946, 215711, 22719...
## $ Australia                             <dbl> 4823, 3254, 3571, 10084, 1301...
## $ `New Zealand`                         <dbl> 497, 394, 592, 2249, 2606, 26...
## $ Oceania                               <dbl> 3, 11, 69, 70, 11, 11, 17, 36...
## $ Haymatlos                             <dbl> 1056, 947, 1233, 1304, 1481, ...
## $ `Grand Total`                         <dbl> 919539, 858751, 1221568, 1411...

As it can be seen, the data frame includes 150 observations of 103 groups, 1 of them being the Date (in the form of Year-Month), 96 of them being individual countries and 6 of them being groups of countries.

2 Preprocessing Data

First, we need to convert the date column into dttm format instead of chr.

#Parsing the Date column and changing them to required format
raw_df$Date <- parse_date_time(raw_df$Date, "ym")

raw_df %>% select(Date) %>% glimpse()
## Rows: 150
## Columns: 1
## $ Date <dttm> 2008-01-01, 2008-02-01, 2008-03-01, 2008-04-01, 2008-05-01, 2...
#Let's form another data frame not including the totals
df <- raw_df %>% select(-contains("Total")) 

#These are the unique country names
countries <- df %>% select(-Date) %>% colnames()

3 Data Analysis

3.1 Total visitors from countries by year and month

Let’s analyze which countries visited Turkey in this period the most and the least, both in terms of total tourists and average tourists per month and the number of visitors by those countries. Below are the top 10 countries that visited Turkey the most and the least:

totalsums <- df %>% select(-Date) %>% summarise(across(everything(),sum)) %>% sort(decreasing = TRUE) 
top10_s <- t(totalsums %>% select(1:10))
last10_s <- t(totalsums %>% select(87:96))
top10_s
##              [,1]
## Germany  56480704
## Russia   46934888
## England  28369408
## Bulgaria 21681788
## Iran     20221227
## Georgia  19886761
## Holland  13736849
## France   10688681
## Ukraine  10602414
## Greece    8325941
last10_s
##                          [,1]
## South Cyprus           164311
## Chile                  161156
## Luxemburg              132672
## Sudan                  122162
## Bangladesh             112580
## Venezuela              102757
## Malta                   76191
## Iceland                 73569
## Other Europe Countries  31809
## Oceania                 11428
totalmeans <- df %>% select(-Date) %>% summarise(across(everything(),mean)) %>% sort(decreasing = TRUE) 
top10_m <- t(totalmeans %>% select(1:10))
last10_m <- t(totalmeans %>% select(87:96))
top10_m
##               [,1]
## Germany  376538.03
## Russia   312899.25
## England  189129.39
## Bulgaria 144545.25
## Iran     134808.18
## Georgia  132578.41
## Holland   91578.99
## France    71257.87
## Ukraine   70682.76
## Greece    55506.27
last10_m
##                              [,1]
## South Cyprus           1095.40667
## Chile                  1074.37333
## Luxemburg               884.48000
## Sudan                   814.41333
## Bangladesh              750.53333
## Venezuela               685.04667
## Malta                   507.94000
## Iceland                 490.46000
## Other Europe Countries  212.06000
## Oceania                  76.18667

Interestingly, both lists have the same countries in terms of means and averages, so we can put them in the same data frame.

top10 <- bind_cols(means = top10_m, sums= top10_s, names= rownames(top10_m)) 
last10 <- bind_cols(means = last10_m, sums= last10_s, names= rownames(last10_m)) 


top10  %>% ggplot(aes(x=names, y=means, fill=names)) + geom_col() +theme_minimal() + theme(legend.position = "None") 

last10 %>% ggplot(aes(x=names, y=means,fill=names)) + geom_col() +theme_minimal() +  theme(legend.position = "None") 

We can also check which country visited Turkey the most each year & month:

#Creating a data frame with yearly aggregates
year_df <- df %>% mutate(Year= year(Date)) %>% select (-Date) %>% relocate(Year) %>% group_by(Year) %>% summarise_all(list(sum))

#Finding the name of the country which is maximum of each year
year_df$max = names(year_df)[apply(year_df, 1, which.max)]
year_df %>% select(Year,max)
## # A tibble: 13 x 2
##     Year max    
##    <dbl> <chr>  
##  1  2008 Germany
##  2  2009 Germany
##  3  2010 Germany
##  4  2011 Germany
##  5  2012 Germany
##  6  2013 Germany
##  7  2014 Germany
##  8  2015 Germany
##  9  2016 Germany
## 10  2017 Russia 
## 11  2018 Russia 
## 12  2019 Russia 
## 13  2020 Germany
month_df <- df %>% mutate(Month= month(Date)) %>% select (-Date) %>% relocate(Month) %>% group_by(Month) %>% summarise_all(list(sum))

#Finding the name of the country which is maximum of each year
month_df$max = names(month_df)[apply(month_df, 1, which.max)]
month_df %>% select(Month,max)
## # A tibble: 12 x 2
##    Month max    
##    <dbl> <chr>  
##  1     1 Germany
##  2     2 Germany
##  3     3 Germany
##  4     4 Germany
##  5     5 Russia 
##  6     6 Russia 
##  7     7 Russia 
##  8     8 Germany
##  9     9 Germany
## 10    10 Germany
## 11    11 Germany
## 12    12 Germany

Let’s see which continents the tourists came from the most.

total_df <- raw_df %>% select("Date" | contains("Total"))

total_df %>% pivot_longer(.,-Date) %>% ggplot(.,aes(x=Date,y=value,color=name)) + geom_line()

total_df %>% mutate(Year = year(Date)) %>% select(-Date) %>% group_by(Year) %>% summarise_all(list(mean)) %>% 
  pivot_longer(.,-Year) %>% ggplot(.,aes(x=Year,y=value,color=name)) + geom_line()

It’s worth checking the months in which the most tourists visited Turkey.

monthly_sum <- raw_df %>% mutate(month=month(Date)) %>% select(`Grand Total`,month,-Date) %>% relocate(month) %>% group_by(month) %>% summarise(sum=sum(`Grand Total`))

ggplot(monthly_sum) + geom_col(aes(x=month,y=sum)) + labs(title = "Visitors by Month", x="Month", y="Number of visitors") + theme_minimal() 

#Add names of the months here

3.2 Seasonality Analysis

We can explore the effect of seasonality in the following way:

ts <- ts(total_df$`Grand Total`, frequency = 12, start = 2008) 
decomposed <- decompose(ts, type="additive")
plot(decomposed)

The decomposed plot shows us there is an increased trend, however in 2016, there is a significant drop in the number of tourists, which can be attributed to the coup attempt during that summer.

This is the original plot of the data:

total_df %>% ggplot(aes(x=Date,y=`Grand Total`)) +geom_line() + theme_minimal() #put labels and stuff

This is the deseasonalized version of the data:

ts.stl <- stl(ts,"periodic") 
ts.sa <- seasadj(ts.stl) 

total_df %>% ggplot(aes(x=Date, y= ts.sa)) +geom_line() + theme_minimal() #put labels and stuff

seasonplot(ts.sa, 12, col=rainbow(13), season.labels=TRUE, year.labels=TRUE, main="Seasonal Visitors") #Make these clearer?

We can see that the tourist data in 2020 is very different than the rest and downwards trending due to Covid-19.

3.3 Relation Between Number of Visitors and Terrorism in Turkey

Reading the terrorism data:

ter<-read.csv("globalterrorism_turkey.csv")

Preporocessing the terrorism data:

terrorism_turkey<-ter%>%filter(country_txt=="Turkey")%>%filter(iyear>=2008)
terrorism_turkey$natlty1_txt=NULL
colnames(terrorism_turkey)[6]  <- "City"
colnames(terrorism_turkey)[7]  <- "County"

Terrorism events are grouped by years and months in order to be examined.

number_of_events_year<-terrorism_turkey%>%group_by(iyear)%>%count()
number_of_events_month<-terrorism_turkey%>%group_by(imonth,iyear)%>%count()
colnames(number_of_events_year)[2]  <- "total"
colnames(number_of_events_year)[1]  <- "Year"
number_of_events_year$Year<- as.Date(as.character(number_of_events_year$Year), format = "%Y")

Touristic data is arranged.

data_tourist_year<-data_tourist%>%pivot_longer(cols=-Date,names_to="Country", values_to="Visits")%>%group_by(Country,year(Date))%>%summarise(Total_year=sum(Visits))
colnames(data_tourist_year)[2]  <- "Year"
colnames(data_tourist_year)[3]  <- "Visits"

Terrorism data is analyzed city by city.

terrorismBycity<-terrorism_turkey%>%group_by(City, iyear)%>%count()
colnames(terrorismBycity)[2]  <- "Year"


yearly_total_visits<-data_tourist_year%>%group_by(Year)%>%summarize(total=sum(Visits))
yearly_total_visits$Year<- as.Date(as.character(yearly_total_visits$Year), format = "%Y")
class(yearly_total_visits$Year)
## [1] "Date"

The plot with two scales represent the correlation between the number of tourists and terrorism statistics.

ggplot() + 
  geom_bar(mapping = aes(x = yearly_total_visits$Year, y = yearly_total_visits$total), stat = "identity", fill = "black") +
  geom_line(mapping = aes(x = number_of_events_year$Year, y = number_of_events_year$total*100000), size = 2, color = "blue") + 
  scale_x_date(name = "Years") +
  scale_y_continuous(name = "number of tourists", 
                     sec.axis = sec_axis(~./100000, name = "terrorism", 
                                         labels = function(b) { paste0(b)})) + 
  theme(
    axis.title.y = element_text(color = "black"),
    axis.title.y.right = element_text(color = "blue"))

3.4 Relation Between Number of Visitors and Development Level of Their Countries

We used multiple development indicators to analyze number of visitors with respect to the development levels of their countries.

Indicators used:
* GDP per capita
* Life expectancy
* Human Development Index

Importing main data:

raw_data <- read_xlsx("milliyetlere_gore_ziyaretci_sayisi.xlsx")

raw_data <- raw_data %>%
   mutate(Date=as.yearmon(Date))

Creating yearly sums of visitors:
We also took yearly average number of visitors between 2008 and 2018.

years_total <- raw_data %>%
  t()
colnames(years_total)=years_total[1,]
years_total <- years_total[-1,]

years_total <- as.data.frame(years_total)
years_total <- rownames_to_column(years_total, "Country Name")

years_total[,2:151] <- as.numeric(unlist(years_total[,2:151]))

years_total<- years_total %>%
  rowwise() %>%
  mutate( sum_2008=sum(c_across(`Jan 2008`:`Dec 2008`)),
          sum_2009=sum(c_across(`Jan 2009`:`Dec 2009`)),
          sum_2010=sum(c_across(`Jan 2010`:`Dec 2010`)),
          sum_2011=sum(c_across(`Jan 2011`:`Dec 2011`)),
          sum_2012=sum(c_across(`Jan 2012`:`Dec 2012`)),
          sum_2013=sum(c_across(`Jan 2013`:`Dec 2013`)),
          sum_2014=sum(c_across(`Jan 2014`:`Dec 2014`)),
          sum_2015=sum(c_across(`Jan 2015`:`Dec 2015`)),
          sum_2016=sum(c_across(`Jan 2016`:`Dec 2016`)),
          sum_2017=sum(c_across(`Jan 2017`:`Dec 2017`)),
          sum_2018=sum(c_across(`Jan 2018`:`Dec 2018`)),
          sum_2019=sum(c_across(`Jan 2019`:`Dec 2019`)),
          avg_2008to2018_yearly=mean(c_across(`Jan 2008`:`Dec 2018`))*12
  ) %>%
  select('Country Name', sum_2008:sum_2019, avg_2008to2018_yearly)

Importing GDP per capita data:
Our visitors data includes only the years 2008 to 2020, so we will need GDP per capita for only these years. Also we don’t have GDP per capita data for 2020, that’s why we took only from 2008 to 2019.

GDP_per_capita_data <- read_xls("GDP_per_capita.xls")
GDP_per_capita_data %>%
  glimpse()
## Rows: 264
## Columns: 64
## $ `Country Name`   <chr> "Aruba", "Afghanistan", "Angola", "Albania", "Ando...
## $ `Country Code`   <chr> "ABW", "AFG", "AGO", "ALB", "AND", "ARB", "ARE", "...
## $ `Indicator Name` <chr> "GDP per capita (current US$)", "GDP per capita (c...
## $ `Indicator Code` <chr> "NY.GDP.PCAP.CD", "NY.GDP.PCAP.CD", "NY.GDP.PCAP.C...
## $ `1960`           <dbl> NA, 59.77319, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ `1961`           <dbl> NA, 59.86087, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ `1962`           <dbl> NA, 58.45801, NA, NA, NA, NA, NA, 1155.89017, NA, ...
## $ `1963`           <dbl> NA, 78.70639, NA, NA, NA, NA, NA, 850.30474, NA, N...
## $ `1964`           <dbl> NA, 82.09523, NA, NA, NA, NA, NA, 1173.23821, NA, ...
## $ `1965`           <dbl> NA, 101.10830, NA, NA, NA, NA, NA, 1279.11343, NA,...
## $ `1966`           <dbl> NA, 137.59435, NA, NA, NA, NA, NA, 1272.80298, NA,...
## $ `1967`           <dbl> NA, 160.89859, NA, NA, NA, NA, NA, 1062.54355, NA,...
## $ `1968`           <dbl> NA, 129.10832, NA, NA, NA, 224.87868, NA, 1141.080...
## $ `1969`           <dbl> NA, 129.32971, NA, NA, NA, 240.03624, NA, 1329.058...
## $ `1970`           <dbl> NA, 156.5189, NA, NA, 3238.5568, 262.8674, NA, 132...
## $ `1971`           <dbl> NA, 159.56758, NA, NA, 3498.17365, 295.97173, NA, ...
## $ `1972`           <dbl> NA, 135.31731, NA, NA, 4217.17358, 343.56731, NA, ...
## $ `1973`           <dbl> NA, 143.14465, NA, NA, 5342.16856, 423.13549, NA, ...
## $ `1974`           <dbl> NA, 173.65376, NA, NA, 6319.73903, 777.56257, NA, ...
## $ `1975`           <dbl> NA, 186.5109, NA, NA, 7169.1010, 836.2108, 26847.7...
## $ `1976`           <dbl> NA, 197.4455, NA, NA, 7152.3751, 1007.1433, 30118....
## $ `1977`           <dbl> NA, 224.2248, NA, NA, 7751.3702, 1123.1456, 33823....
## $ `1978`           <dbl> NA, 247.3541, NA, NA, 9129.7062, 1193.7481, 28456....
## $ `1979`           <dbl> NA, 275.7382, NA, NA, 11820.8494, 1563.7073, 33512...
## $ `1980`           <dbl> NA, 272.6553, 710.9816, NA, 12377.4116, 2052.9615,...
## $ `1981`           <dbl> NA, 264.1113, 642.3839, NA, 10372.2328, 2050.7747,...
## $ `1982`           <dbl> NA, NA, 619.9614, NA, 9610.2663, 1864.8757, 40026....
## $ `1983`           <dbl> NA, NA, 623.4406, NA, 8022.6548, 1699.2197, 34843....
## $ `1984`           <dbl> NA, NA, 637.7152, 639.4847, 7728.9067, 1672.2829, ...
## $ `1985`           <dbl> NA, NA, 758.2376, 639.8659, 7774.3938, 1606.7598, ...
## $ `1986`           <dbl> 6472.5020, NA, 685.2701, 693.8735, 10361.8160, 148...
## $ `1987`           <dbl> 7885.7965, NA, 756.2619, 674.7934, 12616.1676, 154...
## $ `1988`           <dbl> 9764.7900, NA, 792.3031, 652.7743, 14304.3570, 147...
## $ `1989`           <dbl> 11392.4558, NA, 890.5541, 697.9956, 15166.4379, 15...
## $ `1990`           <dbl> 12307.3117, NA, 947.7042, 617.2304, 18878.5060, 20...
## $ `1991`           <dbl> 13496.0031, NA, 865.6927, 336.5870, 19532.5402, 19...
## $ `1992`           <dbl> 14046.5038, NA, 656.3618, 200.8522, 20547.7118, 20...
## $ `1993`           <dbl> 14936.8272, NA, 441.2007, 367.2792, 16516.4710, 19...
## $ `1994`           <dbl> 16241.0465, NA, 328.6733, 586.4163, 16234.8090, 19...
## $ `1995`           <dbl> 16439.3564, NA, 397.1795, 750.6044, 18461.0649, 20...
## $ `1996`           <dbl> 16586.0684, NA, 522.6438, 1009.9777, 19017.1746, 2...
## $ `1997`           <dbl> 17927.7496, NA, 514.2952, 717.3806, 18353.0597, 23...
## $ `1998`           <dbl> 19078.3432, NA, 423.5937, 813.7903, 18894.5215, 21...
## $ `1999`           <dbl> 19356.2034, NA, 387.7843, 1033.2417, 19261.7105, 2...
## $ `2000`           <dbl> 20620.7006, NA, 556.8363, 1126.6833, 21854.2468, 2...
## $ `2001`           <dbl> 20669.0320, NA, 527.3335, 1281.6594, 22971.5355, 2...
## $ `2002`           <dbl> 20436.8871, 179.4266, 872.4945, 1425.1248, 25066.8...
## $ `2003`           <dbl> 20833.7616, 190.6838, 982.9609, 1846.1188, 32271.9...
## $ `2004`           <dbl> 22569.9750, 211.3821, 1255.5640, 2373.5798, 37969....
## $ `2005`           <dbl> 23300.0396, 242.0313, 1902.4223, 2673.7873, 40066....
## $ `2006`           <dbl> 24045.2725, 263.7337, 2599.5665, 2972.7433, 42675....
## $ `2007`           <dbl> 25835.1327, 359.6932, 3121.9956, 3595.0372, 47803....
## $ `2008`           <dbl> 27084.7037, 364.6607, 4080.9414, 4370.5401, 48718....
## $ `2009`           <dbl> 24630.4537, 438.0760, 3122.7808, 4114.1401, 43503....
## $ `2010`           <dbl> 23512.6026, 543.3030, 3587.8838, 4094.3503, 40852....
## $ `2011`           <dbl> 24985.9933, 591.1628, 4615.4680, 4437.1429, 43335....
## $ `2012`           <dbl> 24713.6980, 641.8715, 5100.0958, 4247.6300, 38686....
## $ `2013`           <dbl> 26189.4355, 637.1655, 5254.8823, 4413.0609, 39538....
## $ `2014`           <dbl> 26647.9381, 613.8567, 5408.4105, 4578.6320, 41303....
## $ `2015`           <dbl> 27980.8807, 578.4664, 4166.9797, 3952.8012, 35762....
## $ `2016`           <dbl> 28281.3505, 547.2281, 3506.0729, 4124.0557, 37474....
## $ `2017`           <dbl> 29007.6930, 556.3020, 4095.8129, 4531.0208, 38962....
## $ `2018`           <dbl> NA, 524.1629, 3289.6467, 5284.3802, 41793.0553, 66...
## $ `2019`           <dbl> NA, 502.1155, 2973.5912, 5352.8574, 40886.3912, 65...
GDP_per_capita_data <- GDP_per_capita_data %>%
  select(`Country Name`,`2008`:`2019`) 

GDP_per_capita_data <- as.data.frame(GDP_per_capita_data)

Joining the data:

colnames(GDP_per_capita_data)[2:13] <- paste0("GDP_per_capita_", colnames(GDP_per_capita_data)[2:13])

joined_data_GDP_per_capita <- years_total %>%
  inner_join(GDP_per_capita_data, by=c(`Country Name`= 'Country Name' )) %>%
  mutate(avg_GDPperCap_2008to2018=mean(c_across(GDP_per_capita_2008:GDP_per_capita_2018), na.rm = TRUE))

We divided countries into 4 categories using average GDP per capita quantiles. Then we plotted average GDP per capita with respect to average yearly visitors, colored by GDP per capita level. Also we took sum of yearly average visitors within the groups and plotted a pie chart, in order to see the breakdown of development level of countries of Turkey’s visitors.

q1 <- quantile(joined_data_GDP_per_capita$avg_GDPperCap_2008to2018, na.rm = TRUE)

joined_data_GDP_per_capita <- joined_data_GDP_per_capita %>%
  mutate(GDP_per_capita_level=case_when((avg_GDPperCap_2008to2018>=q1[1]&avg_GDPperCap_2008to2018<q1[2])~"very low",
                                        (avg_GDPperCap_2008to2018>=q1[2]&avg_GDPperCap_2008to2018<q1[3])~"low",
                                        (avg_GDPperCap_2008to2018>=q1[3]&avg_GDPperCap_2008to2018<q1[4])~"medium",
                                        (avg_GDPperCap_2008to2018>=q1[4]&avg_GDPperCap_2008to2018<=q1[5])~"high" 
                                        )
         )


joined_data_GDP_per_capita %>%
  ggplot(aes(x=avg_2008to2018_yearly,y=avg_GDPperCap_2008to2018, color=GDP_per_capita_level),fig.align='center')+
  geom_point()

GDP_per_capita_level_grouped <- joined_data_GDP_per_capita %>%
  group_by(GDP_per_capita_level) %>%
  summarise(sum=sum(avg_2008to2018_yearly))

ggplot(GDP_per_capita_level_grouped, aes(x="",y=sum,fill=GDP_per_capita_level))+geom_bar(stat="identity",width=1)+coord_polar("y")+labs(x="",y="sum")

Top 5 countries whose citizens visited Turkey the most belong either high or low GDP per capita levels. It is worth to mention that people whose countries belong to the medium category do not visit Turkey much, unlike high and low categories.

Importing life expectancy data of countries:

life_expectancy_data <- read_xls("Life_expectancy.xls")

life_expectancy_data %>%
  glimpse()
## Rows: 264
## Columns: 64
## $ `Country Name`   <chr> "Aruba", "Afghanistan", "Angola", "Albania", "Ando...
## $ `Country Code`   <chr> "ABW", "AFG", "AGO", "ALB", "AND", "ARB", "ARE", "...
## $ `Indicator Name` <chr> "Life expectancy at birth, total (years)", "Life e...
## $ `Indicator Code` <chr> "SP.DYN.LE00.IN", "SP.DYN.LE00.IN", "SP.DYN.LE00.I...
## $ `1960`           <dbl> 65.66200, 32.44600, 37.52400, 62.28300, NA, 46.546...
## $ `1961`           <dbl> 66.07400, 32.96200, 37.81100, 63.30100, NA, 47.141...
## $ `1962`           <dbl> 66.44400, 33.47100, 38.11300, 64.19000, NA, 47.731...
## $ `1963`           <dbl> 66.78700, 33.97100, 38.43000, 64.91400, NA, 48.320...
## $ `1964`           <dbl> 67.11300, 34.46300, 38.76000, 65.46300, NA, 48.910...
## $ `1965`           <dbl> 67.43500, 34.94800, 39.10200, 65.85000, NA, 49.496...
## $ `1966`           <dbl> 67.76200, 35.43000, 39.45400, 66.11000, NA, 50.072...
## $ `1967`           <dbl> 68.09500, 35.91400, 39.81300, 66.30400, NA, 50.634...
## $ `1968`           <dbl> 68.43600, 36.40300, 40.17800, 66.48700, NA, 51.182...
## $ `1969`           <dbl> 68.78400, 36.90000, 40.54600, 66.68900, NA, 51.722...
## $ `1970`           <dbl> 69.14000, 37.40900, 40.91400, 66.93500, NA, 52.264...
## $ `1971`           <dbl> 69.49800, 37.93000, 41.28200, 67.23700, NA, 52.822...
## $ `1972`           <dbl> 69.85100, 38.46100, 41.65000, 67.58200, NA, 53.406...
## $ `1973`           <dbl> 70.19100, 39.00300, 42.01600, 67.95300, NA, 54.015...
## $ `1974`           <dbl> 70.51900, 39.55800, 42.37400, 68.34300, NA, 54.642...
## $ `1975`           <dbl> 70.83300, 40.12800, 42.72100, 68.73600, NA, 55.277...
## $ `1976`           <dbl> 71.14000, 40.71500, 43.05300, 69.11000, NA, 55.909...
## $ `1977`           <dbl> 71.44100, 41.32000, 43.36700, 69.44800, NA, 56.528...
## $ `1978`           <dbl> 71.73600, 41.94400, 43.66000, 69.74200, NA, 57.133...
## $ `1979`           <dbl> 72.02300, 42.58500, 43.93100, 69.99100, NA, 57.734...
## $ `1980`           <dbl> 72.29300, 43.24400, 44.17800, 70.20800, NA, 58.345...
## $ `1981`           <dbl> 72.53800, 43.92300, 44.40400, 70.41600, NA, 58.977...
## $ `1982`           <dbl> 72.75100, 44.61700, 44.61100, 70.63500, NA, 59.636...
## $ `1983`           <dbl> 72.92900, 45.32400, 44.79900, 70.87600, NA, 60.316...
## $ `1984`           <dbl> 73.07100, 46.04000, 44.96600, 71.13400, NA, 61.003...
## $ `1985`           <dbl> 73.18100, 46.76100, 45.10700, 71.38800, NA, 61.676...
## $ `1986`           <dbl> 73.26200, 47.48600, 45.21300, 71.60500, NA, 62.314...
## $ `1987`           <dbl> 73.32500, 48.21100, 45.28300, 71.76000, NA, 62.901...
## $ `1988`           <dbl> 73.37800, 48.93000, 45.31700, 71.84300, NA, 63.429...
## $ `1989`           <dbl> 73.42500, 49.64000, 45.32400, 71.86000, NA, 63.898...
## $ `1990`           <dbl> 73.46800, 50.33100, 45.30600, 71.83600, NA, 64.351...
## $ `1991`           <dbl> 73.50900, 50.99900, 45.27100, 71.80300, NA, 64.736...
## $ `1992`           <dbl> 73.54400, 51.64100, 45.23000, 71.80200, NA, 65.043...
## $ `1993`           <dbl> 73.57300, 52.25600, 45.20100, 71.86000, NA, 65.414...
## $ `1994`           <dbl> 73.59800, 52.84200, 45.20100, 71.99200, NA, 65.785...
## $ `1995`           <dbl> 73.62200, 53.39800, 45.24600, 72.20500, NA, 66.196...
## $ `1996`           <dbl> 73.64600, 53.92400, 45.35000, 72.49500, NA, 66.554...
## $ `1997`           <dbl> 73.67100, 54.42400, 45.51900, 72.83800, NA, 66.897...
## $ `1998`           <dbl> 73.70000, 54.90600, 45.76300, 73.20800, NA, 67.218...
## $ `1999`           <dbl> 73.73800, 55.37600, 46.09300, 73.58700, NA, 67.517...
## $ `2000`           <dbl> 73.78700, 55.84100, 46.52200, 73.95500, NA, 67.800...
## $ `2001`           <dbl> 73.85300, 56.30800, 47.05900, 74.28800, NA, 68.072...
## $ `2002`           <dbl> 73.93700, 56.78400, 47.70200, 74.57900, NA, 68.340...
## $ `2003`           <dbl> 74.03800, 57.27100, 48.44000, 74.82800, NA, 68.610...
## $ `2004`           <dbl> 74.15600, 57.77200, 49.26300, 75.03900, NA, 68.886...
## $ `2005`           <dbl> 74.28700, 58.29000, 50.16500, 75.22800, NA, 69.164...
## $ `2006`           <dbl> 74.42900, 58.82600, 51.14300, 75.42300, NA, 69.439...
## $ `2007`           <dbl> 74.57600, 59.37500, 52.17700, 75.64600, NA, 69.701...
## $ `2008`           <dbl> 74.72500, 59.93000, 53.24300, 75.91200, NA, 69.942...
## $ `2009`           <dbl> 74.87200, 60.48400, 54.31100, 76.22100, NA, 70.157...
## $ `2010`           <dbl> 75.01700, 61.02800, 55.35000, 76.56200, NA, 70.349...
## $ `2011`           <dbl> 75.15800, 61.55300, 56.33000, 76.91400, NA, 70.528...
## $ `2012`           <dbl> 75.29900, 62.05400, 57.23600, 77.25200, NA, 70.703...
## $ `2013`           <dbl> 75.44100, 62.52500, 58.05400, 77.55400, NA, 70.882...
## $ `2014`           <dbl> 75.58300, 62.96600, 58.77600, 77.81300, NA, 71.064...
## $ `2015`           <dbl> 75.72500, 63.37700, 59.39800, 78.02500, NA, 71.249...
## $ `2016`           <dbl> 75.86800, 63.76300, 59.92500, 78.19400, NA, 71.436...
## $ `2017`           <dbl> 76.01000, 64.13000, 60.37900, 78.33300, NA, 71.622...
## $ `2018`           <dbl> 76.15200, 64.48600, 60.78200, 78.45800, NA, 71.806...
## $ `2019`           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
life_expectancy_data <- life_expectancy_data %>%
  select(`Country Name`,`2008`:`2018`)

life_expectancy_data <- as.data.frame(life_expectancy_data)

Joining the data:

colnames(life_expectancy_data)[2:12] <- paste0("life_expectancy_", colnames(life_expectancy_data)[2:12])

joined_data_life_expectancy <- years_total %>%
  inner_join(life_expectancy_data, by=c('Country Name'='Country Name')) %>%
  mutate(avg_life_expectancy_2008to2018=mean(c_across(life_expectancy_2008:life_expectancy_2018)))

We divided countries into 4 categories using average life expectancy quantiles. Then we plotted average life expectancy with respect to average yearly visitors, colored by life expectancy level of countries. We also took sum of yearly average visitors within the groups and plotted a pie chart,in order to see the breakdown of development level of countries of Turkey’s visitors.

q2 <- quantile(joined_data_life_expectancy$avg_life_expectancy_2008to2018, na.rm = TRUE)

joined_data_life_expectancy <- joined_data_life_expectancy %>%
  mutate(life_expectancy_level=case_when((avg_life_expectancy_2008to2018>=q2[1]&avg_life_expectancy_2008to2018<q2[2])~"very low",
                                        (avg_life_expectancy_2008to2018>=q2[2]&avg_life_expectancy_2008to2018<q2[3])~"low",
                                        (avg_life_expectancy_2008to2018>=q2[3]&avg_life_expectancy_2008to2018<q2[4])~"medium",
                                        (avg_life_expectancy_2008to2018>=q2[4]&avg_life_expectancy_2008to2018<=q2[5])~"high" 
  )
  )


joined_data_life_expectancy %>%
  ggplot(aes(x=avg_2008to2018_yearly,y=avg_life_expectancy_2008to2018, color=life_expectancy_level)) +
  geom_point()

life_expectancy_level_grouped <- joined_data_life_expectancy %>%
  group_by(life_expectancy_level) %>%
  summarise(sum=sum(avg_2008to2018_yearly))

ggplot(life_expectancy_level_grouped, aes(x="",y=sum,fill=life_expectancy_level))+geom_bar(stat="identity",width=1)+coord_polar("y")+labs(x="",y="sum")

Unlike the pie chart for GDP per capita, life expectancy pie chart is not far from a equally divided pie.

Importing Human Development Index (HDI) data:

HDI_data <- read_xlsx("Human_development_index_HDI.xlsx")

HDI_data %>%
  glimpse()
## Rows: 212
## Columns: 30
## $ Country <chr> "Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "...
## $ `1990`  <chr> "0.29799999999999999", "0.64400000000000002", "0.5779999999...
## $ `1991`  <chr> "0.30399999999999999", "0.625", "0.58199999999999996", "N/A...
## $ `1992`  <chr> "0.312", "0.60799999999999998", "0.58899999999999997", "N/A...
## $ `1993`  <chr> "0.308", "0.61099999999999999", "0.59299999999999997", "N/A...
## $ `1994`  <chr> "0.30299999999999999", "0.61699999999999999", "0.5969999999...
## $ `1995`  <chr> "0.32700000000000001", "0.629", "0.60199999999999998", "N/A...
## $ `1996`  <chr> "0.33100000000000002", "0.63900000000000001", "0.61", "N/A"...
## $ `1997`  <chr> "0.33500000000000002", "0.63900000000000001", "0.6189999999...
## $ `1998`  <chr> "0.33900000000000002", "0.64900000000000002", "0.629", "N/A...
## $ `1999`  <chr> "0.34300000000000003", "0.66", "0.63800000000000001", "N/A"...
## $ `2000`  <chr> "0.34499999999999997", "0.66700000000000004", "0.6460000000...
## $ `2001`  <chr> "0.34699999999999998", "0.67300000000000004", "0.6550000000...
## $ `2002`  <chr> "0.378", "0.68", "0.66600000000000004", "0.78", "0.41899999...
## $ `2003`  <chr> "0.38700000000000001", "0.68700000000000006", "0.6760000000...
## $ `2004`  <chr> "0.4", "0.69199999999999995", "0.68500000000000005", "0.825...
## $ `2005`  <chr> "0.41", "0.70199999999999996", "0.69399999999999995", "0.81...
## $ `2006`  <chr> "0.41899999999999998", "0.70899999999999996", "0.6989999999...
## $ `2007`  <chr> "0.43099999999999999", "0.71799999999999997", "0.7079999999...
## $ `2008`  <chr> "0.436", "0.72399999999999998", "0.71099999999999997", "0.8...
## $ `2009`  <chr> "0.44700000000000001", "0.72899999999999998", "0.72", "0.83...
## $ `2010`  <chr> "0.46400000000000002", "0.74", "0.73", "0.82799999999999996...
## $ `2011`  <chr> "0.46500000000000002", "0.75900000000000001", "0.7379999999...
## $ `2012`  <chr> "0.47899999999999998", "0.77100000000000002", "0.7369999999...
## $ `2013`  <chr> "0.48499999999999999", "0.78100000000000003", "0.746", "0.8...
## $ `2014`  <chr> "0.48799999999999999", "0.78700000000000003", "0.749", "0.8...
## $ `2015`  <chr> "0.49", "0.78800000000000003", "0.751", "0.85", "0.56499999...
## $ `2016`  <chr> "0.49099999999999999", "0.78800000000000003", "0.755", "0.8...
## $ `2017`  <chr> "0.49299999999999999", "0.78900000000000003", "0.7580000000...
## $ `2018`  <chr> "0.496", "0.79100000000000004", "0.75900000000000001", "0.8...
colnames(HDI_data) <- gsub("X","",colnames(HDI_data))

HDI_data <- HDI_data %>%
  select('Country Name'= Country, '2008':'2018')

HDI_data <- as.data.frame(HDI_data)

Joining and plotting the data:
We also classified countries by their HDI into 4 categories: “very high”, “high”, “medium” and “low” (source: https://en.wikipedia.org/wiki/Developed_country )

colnames(HDI_data)[2:12] <- paste0("HDI_", colnames(HDI_data)[2:12])

HDI_data[,2:12] <- as.numeric(unlist(HDI_data[,2:12]))

joined_data_HDI <- years_total %>%
  inner_join(HDI_data, by=c('Country Name'='Country Name')) %>%
  mutate(avg_HDI_2008to2018=mean(c_across(HDI_2008:HDI_2018),na.rm = TRUE)) %>%
  mutate(HDI_level=case_when(avg_HDI_2008to2018>=0.8~"very high",
                             (avg_HDI_2008to2018<0.8&avg_HDI_2008to2018>=0.7)~"high",
                             (avg_HDI_2008to2018<0.7&avg_HDI_2008to2018>=0.55)~"medium",
                             avg_HDI_2008to2018<0.55~"low"
                             )
         )

joined_data_HDI %>%
  ggplot(aes(x=avg_2008to2018_yearly,y=avg_HDI_2008to2018, color=HDI_level)) +
  geom_point()

HDI_level_grouped <- joined_data_HDI %>%
  group_by(HDI_level) %>%
  summarise(sum=sum(avg_2008to2018_yearly))

ggplot(HDI_level_grouped, aes(x="",y=sum,fill=HDI_level))+geom_bar(stat="identity",width=1)+coord_polar("y")+labs(x="",y="sum")

Besides given levels, we also wanted to analyze levels by quantile, like we did with other metrics.

q3 <- quantile(joined_data_HDI$avg_HDI_2008to2018, na.rm = TRUE)

joined_data_HDI <- joined_data_HDI %>%
  mutate(HDI_level_by_quantile=case_when((avg_HDI_2008to2018 >=q3[1]&avg_HDI_2008to2018<q3[2])~"very low",
                                         (avg_HDI_2008to2018>=q3[2]&avg_HDI_2008to2018<q3[3])~"low",
                                         (avg_HDI_2008to2018>=q3[3]&avg_HDI_2008to2018<q3[4])~"medium",
                                         (avg_HDI_2008to2018>=q3[4]&avg_HDI_2008to2018<=q3[5])~"high" 
  )
  )

HDI_level_by_quantile_grouped <- joined_data_HDI %>%
  group_by(HDI_level_by_quantile) %>%
  summarise(sum=sum(avg_2008to2018_yearly))

joined_data_HDI %>%
  ggplot(aes(x=avg_2008to2018_yearly,y=avg_HDI_2008to2018, color=HDI_level_by_quantile)) +
  geom_point()

ggplot(HDI_level_by_quantile_grouped, aes(x="",y=sum,fill=HDI_level_by_quantile))+geom_bar(stat="identity",width=1)+coord_polar("y")+labs(x="",y="sum")

3.5 Visa Policy of Turkey

visa<-read_excel("vize_muafiyeti.xlsx")

pivot_longer() function is used to transpose the data.

data_transpose<-data%>%pivot_longer(cols=-Date,names_to="Free Visa")

data_year contains the total number of visits with respect to years.

data_year<-data_transpose%>%group_by(year(Date), `Free Visa`)%>%summarise(total=sum(value))

data_countries vector states the names of the regions and countries.

data_countries<-data%>%select(-Date)%>%names()

In order to find the intersection between the countries which don’t need visa for entering Turkey and and the original data frame, inner_join() function is used. Therefore, a new column that shows the need for visa is created. “1” is assigned to these countries, which means that they are allowed to enter the country without visa.

free_visa_df<- data_transpose%>%inner_join(visa,by="Free Visa")
free_visa_df[,4]<-1
colnames(free_visa_df)[4]  <- "Visa"
colnames(free_visa_df)[2]  <- "Country"

The other countries, which require to have visa for visits in Turkey, are assigned to “0”. Some column names are arranged for clarification.

no_visa_df<-data_transpose%>%anti_join(visa,by="Free Visa")
no_visa_df[,4]<-0
colnames(no_visa_df)[4]  <- "Visa"
colnames(no_visa_df)[2]  <- "Country"

Then, two data frame free_visa_df and no_visa_df is merged and total_data is obtained. Now, the need for visa is clearly shown in single data frame.

#1 means free visa
#0 means that visa is needed

total_data<- free_visa_df%>%full_join(no_visa_df,by=c("Visa","Date","Country","value"))

sum_data represents the number of visits according to countries and years as well as the visa information. sum_byvisa data frame examines the effect of visa requirent closely. It summarizes all the countries and years in terms of visa requirement by calculating the mean and sum of visits. Thus, even though many country can enter Turkey without visa, the others visits much more than the visa-free ones. So, the need for visa doesn’t have a significant effect on the number of visits to Turkey.

sum_data<-total_data%>%group_by(Country,Visa, year(Date))%>%summarise(total=sum(value))
sum_byvisa<-total_data%>%group_by(Visa)%>%summarise(mean=mean(value),total=sum(value))

The continent and region names are removed from the data because they don’t contain any visa information. And, the visit data is ranked in descending order.

ranked_final<-sum_data%>%arrange(desc(total))  
ranked_final<-ranked_final[!grepl("Total|Africa|Other", ranked_final$Country),]
head(ranked_final,10)
## # A tibble: 10 x 4
## # Groups:   Country, Visa [2]
##    Country  Visa `year(Date)`   total
##    <chr>   <dbl>        <dbl>   <dbl>
##  1 Russia      1         2019 7000407
##  2 Russia      1         2018 5937850
##  3 Germany     1         2015 5593065
##  4 Germany     1         2014 5251870
##  5 Germany     1         2013 5048199
##  6 Germany     1         2012 5025660
##  7 Germany     1         2019 5024193
##  8 Germany     1         2011 4815156
##  9 Russia      1         2017 4699557
## 10 Germany     1         2018 4496173
tail(ranked_final,10)
## # A tibble: 10 x 4
## # Groups:   Country, Visa [4]
##    Country     Visa `year(Date)` total
##    <chr>      <dbl>        <dbl> <dbl>
##  1 Oceania        0         2011   474
##  2 Oceania        0         2013   446
##  3 Oceania        0         2017   438
##  4 Oceania        0         2015   360
##  5 Oceania        0         2020   335
##  6 Oceania        0         2008   325
##  7 Iceland        1         2020   318
##  8 Oceania        0         2009   209
##  9 Kosovo         1         2008     0
## 10 Montenegro     1         2008     0

In the final plot, the points show the number of total visits to Turkey, country by country. Red points state that this country on the x-axis is not allowed to enter Turkey without visa. Blue dots represent that free-visa countries, which are free to enter the country without visa.

ggplot(ranked_final,aes(x=Country,y=total,color=factor(Visa)))+
  geom_point()+
  scale_y_continuous(labels = unit_format(unit = "M", scale = 1e-6))+
  theme(axis.text.x = element_text(angle = 80, vjust = .5)) 

3.6 Analysis of Exchange Rates and Visitors

In this part, an analysis of change of exchange rates ,and whether it has salient effects on the number of tourists is conducted. For that purpose, “Euro/Turkish Lira and Dollar/Turkish Lira exchange rates” and “total visitors from Europe and USA” between 2008 and 2020 are used as variables. .

Related data is gathered from the sources and data preparation is conducted through cleaning, filtering and reframing the data. Below, you can see the coding part by clicking the code box on the right side.

mil <- read_xlsx("milliyetlere_gore_ziyaretci_sayisi.xlsx")

mil <- mil %>%
  mutate(Date=as.yearmon(Date)) %>% mutate(Date=as.Date(Date))

euro_dolar<- read_csv("euro-dollar.csv",guess_max = 100,
                      col_types = cols(
                        "Tarih" = col_date(format="%Y-%m"),
                        "TP DK EUR A YTL" = col_number(),
                        "TP DK USD A YTL" = col_number()
                      ))
#NA values stems from the source information etc


#rename

names(euro_dolar)=c("Date", "euro_tl_exchange_rate","dollar_tl_exchange_rate") 

#getting related columns

euro_dolar_tidy<-euro_dolar %>%
  as.data.frame %>%
  slice(1:150) 

#preparing data for visualization

euro_dolar_2<-euro_dolar_tidy%>%
  pivot_longer(!Date,names_to="exchange_type",values_to="rate")

3.6.1 Exchange Rate Analysis

The change in the exchange rates of both EURO and DOLLAR is examined from the first month of 2008 untill sixth month of 2020. For that, first data preproccesing is done, and secondly, data is analyzed and visualized for a better understanding with the help of dplyr and ggplot2 packages.

#ploting line graph to see exchange rate by time

ggplot<-ggplot(euro_dolar_2,aes(Date,rate,color=exchange_type))+ 
  geom_line(size=1)+
  labs(x="Years", y="Exchange rate", title="Exchange Rate by Years(2008-2020)")+
  theme_minimal()
  
  
ggplotly(ggplot)

From the graph, it can be observed that, within the given time frame, euro and dollar tend to increase correlatedly, euro being slightly higher than dollar. Thus, Turkish Lira has been in a trend of losing value for years.

3.6.2 Number of Visitor and Exchange Rate Comparison

In this part, total number of visitors from Europe and USA, and the change of exchange rate in Euro and Dollar is examined to better understand if a correlation and/or trend exist between the two. For that purpose, first, the euro_dolar_tidy dataset and related columns(i.e AVRUPATOP and ABD) of mil dataset are joined together by years:

usa_and_total_europe<-mil[1:150,c("Date","United States","Total Europe")]

#joining two data_sets(Here, we observe that it is perfectly matched.(i.e no NA field))
joined_data<-usa_and_total_europe %>%
  left_join(euro_dolar_tidy,by=c("Date"))

Here, you can see how the combined data looks like:

## # A tibble: 6 x 5
##   Date       `United States` `Total Europe` euro_tl_exchange_~ dollar_tl_exchan~
##   <date>               <dbl>          <dbl>              <dbl>             <dbl>
## 1 2008-01-01           22581         551693               1.72              1.17
## 2 2008-02-01           18562         470599               1.75              1.19
## 3 2008-03-01           29479         732094               1.91              1.23
## 4 2008-04-01           38922         861799               2.04              1.3 
## 5 2008-05-01           80430        1542177               1.94              1.25
## 6 2008-06-01           79917        1611362               1.91              1.23

Total visitors from Europe and Euro/TL exchange rate can be analyzed and visualized with the help of ggplot2 and plotly:

By examining both of the graphs, it can be observed that total number of tourists from Europe to Turkey have an oscillating shape, which means the change in the number of visitors is seasonal( most visitors can be observed during summer months) rather than being correlated with the exchange rate of Euro/TL. Although total number of tourists from Europe has an increasing trend after a significant decrease in 2016, there is still not enough evidence to infer that the increase in tourists is due to the increase in Euro/tl currency. The sharp decrease in 2016 might be, mostly, due to the political geopolitical risks related to the political climate of Turkey at the times. Also, the sharp decrease after March-2020 stems from the COVID-19 epidemic and its global effects.

Similar seasonality can be observed in the number of USA visitors as in Europe. However, the month where peak value in the number of visitors is seen slightly differs from European Tourists, and also between the years. For instance, the peak is observed in October during the years 2008-2009 and 2011, whereas in 2012-2013 and 2019 peak value is seen in July during summer season. Similar to Europe, total visitors has decreased during 2016 probably due to same reasons as mentioned above. Also, the sharp decrease after March-2020, again, stems from the COVID-19 epidemic and its global effects.

3.7 The Effect of Distance Between Turkey and the Other Countries

Average number of tourists per year for each country should be calculated firstly to make an analysis for each year. Data is created again and again for the years from 2008 to 2020 to receive these mean values of tourists from each country via for loop. Some rows are deleted to eliminate ambiguous non-country data such as “Other Europe Countries”.

Distance (between Turkey and specified country) vector is created for each respectively to determine correlation between distance and number of tourists per year. In this way, a correlation vector including correlation values of 13 years is received.

cor_data<-c()

for (i in 2008:2020){
data<-raw_data%>%
  select(-c('Haymatlos', 'Grand Total'))%>%
 filter(format(Date, "%Y")==i)%>%
 pivot_longer(.,cols = -Date,
             names_to = "names", 
          values_to = "values")%>%
  group_by(names)%>%
  summarise(avg=mean(values))

data1<-data[-c(2,4,7,8,15,24,25,27,28,36,37), ] 



distance<-c(10066,2350,13004,1295,12551,1977,1080,2500,2045,5389,1646,2709,1538,10708,915,1979,3189,6077,2637,
            9930,867,2314,3760,8849,2843,2793,7770,792,4566,1744,2682,3301,1048,1802,3596,3322,834,2896,2353,1905,
            4429,8686,8630,1386,2136,2818,3376,410,11286,1249,1575,2130,2063,2000,579,2563,1506,1038,8756,1843,
            11801,1413,1054,3068,12427,2451,3337,1852,3674,1129,1873,13348,8105,1363,1638,1811,2604,518,1931,
            3091,6872,2381,2115,1137,894,10388,2884,16624,1122)

cor_data <- c(cor_data, cor(data1$avg,distance))

}

We can plot the correlation data to decide if there is a relation between number of tourists and distance.

years<-c(2008:2020)
yearly_correlation<-data.frame(Years=years,Correlation=cor_data)
ggplot(yearly_correlation,aes(x=Years, y=Correlation))+geom_col()+ylim(-1,1)+theme_light()+
  scale_x_continuous(breaks = seq(2008, 2020, by = 1))+theme(axis.text.x=element_text(angle = 90))

All the values lay below the zero line, which means there is a negative relationship. So, we can say if distance value increases, number of tourists will decrease. However, it is clear that distance has no a big impact on number of tourists because absoulute value of correlation values is almost zero.

3.8 Forecast Plot for Germany

By using auto.arima function and data of previous 150 months, the number of visitors from Germany are forecasted and plotted.

germany<-mil[1:150,c(1,2)]
germany_ts<-ts(germany)
fit<-auto.arima(germany_ts[,2])
summary(fit)
## Series: germany_ts[, 2] 
## ARIMA(3,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2      ar3      ma1       mean
##       1.2210  -0.2028  -0.3695  -0.6190  382814.82
## s.e.  0.0993   0.1443   0.0843   0.0853   11616.27
## 
## sigma^2 estimated as 1.709e+10:  log likelihood=-1978.49
## AIC=3968.99   AICc=3969.57   BIC=3987.05
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE     MASE       ACF1
## Training set -792.6027 128521.3 101015.6 -146.0323 171.1129 0.825981 0.01975808
fit%>%forecast()%>%autoplot()+xlab("Date")+ylab("Number of Visitors from Germany")